library(tidyverse)
library(readxl)
library(ggplot2)
library(plotly)
library(ggpubr)
library(ggforce)
library(extrafont)
library(ggrepel)
Results: Details of files to analyse and run information
print(paste0("Report generated on: ", Sys.Date()))
## [1] "Report generated on: 2022-03-25"
RawXLinkProphetOutput <- read.delim("Data/XLinkProphet/iprophet-xl.xls", stringsAsFactors = FALSE)
#generated by running convert convertXLinkProphetXLStoUpload6col.pl iprophet-xl.xls 0.3065 FASTA=C:\TPP\database\yeastproteome.fasta
Raw6colOutput <- read.delim("Data/XLinkProphet/iprophet-xl_6col.txt", header=FALSE, stringsAsFactors = FALSE)
colnames(Raw6colOutput) <- c("Pep1", "Accession1", "PepPos1", "Pep2", "Accession2", "PepPos2", "ProtPos1", "ProtPos2")
SampleName = "04112020_Hpm1KO_Spheroplast"
#probability from XLinkProphet for CSMs to get a 1% FDR. This is different in every sample.
probability_01 = 0.0883
probability_comp_01 = 0.3065
#MethylQuant confidence level
MethylQuantConfidence = "High"
Filtering the peptide prophet results of interest by the probablity as assigned by XLinkProphet
FilteredCSMs <- RawXLinkProphetOutput %>%
filter(probability >= probability_01)
FilteredRedundantCrosslinks <- RawXLinkProphetOutput %>%
filter(composite_probability >= probability_comp_01) %>%
distinct()
Processing to combined IDs and generate different levels of redundancy
ReturnXL_Stripped <- function(PeptideWithMods) {
StrippedPep <- str_remove_all(PeptideWithMods, "\\[(.*?)\\]")
return(StrippedPep)
}
AlphabetizeXLKey <- function(XLPep1, XLPep2) {
Interaction <- if_else(XLPep1 == XLPep2, paste0(XLPep1, "_", XLPep2), if_else(XLPep1 < XLPep2, paste0(XLPep1, "_", XLPep2), paste0(XLPep2, "_", XLPep1)))
return(Interaction)
}
AddKey <- function( input_table, Level) {
nrows <- nrow(input_table)
if(Level == "XL")
{
for (i in 1:nrows) {
input_table$Pep1[i] <- ReturnXL_Stripped(input_table$Pep1[i])
input_table$Pep2[i] <- ReturnXL_Stripped(input_table$Pep2[i])
input_table$XLPep1[i] <- paste0(input_table$Pep1[i], "-", input_table$PepPos1[i], "-", input_table$Accession1[i])
input_table$XLPep2[i] <- paste0(input_table$Pep2[i], "-", input_table$PepPos2[i], "-", input_table$Accession2[i])
input_table$XL_Key[i] <- AlphabetizeXLKey(input_table$XLPep1[i], input_table$XLPep2[i])
}
return(input_table)
}
else if (Level == "URP") {
for (i in 1:nrows) {
input_table$Pep1[i] <- ReturnXL_Stripped(input_table$Pep1[i])
input_table$Pep2[i] <- ReturnXL_Stripped(input_table$Pep2[i])
input_table$ResidueKey1[i] <- paste0(input_table$Accession1[i], "-", input_table$ProtPos1[i])
input_table$ResidueKey2[i] <- paste0(input_table$Accession2[i], "-", input_table$ProtPos2[i])
input_table$URP_Key[i] <- AlphabetizeXLKey(input_table$ResidueKey1[i], input_table$ResidueKey2[i])
}
return(input_table)
}
else if (Level == "PPI") {
for (i in 1:nrows) {
input_table$PPI_Key[i] <- AlphabetizeXLKey(input_table$Accession1[i], input_table$Accession2[i])
}
return(input_table)
}
}
#Useful functions to return the 0-based position of the crosslink mod (325.13 = light PIR stump mass, 333.14 = heavy labelled stump mass)
#or strip the peptide sequence, as exported by XLinkProphet under the heading peptide1/peptide2, of any mods. Useful for XLinkDB upload.
ReturnXL_PepPos <- function(PeptideWithMods) {
V1 <- str_replace(PeptideWithMods, "\\[325\\.13\\]", "x")
V1 <- str_replace(V1, "\\[333\\.14\\]", "x")
V1 <- str_remove_all(V1, "\\[(.*?)\\]")
V1 <- str_locate(V1, "x")[1]
#make 0-based modPos
V1 <- V1 -2
return(V1);
}
CleanXLinkProphet <- function(XlinkProphet_output) {
nrows <- nrow(XlinkProphet_output)
for (i in 1:nrows) {
XlinkProphet_output$Accession1[i] <- str_remove(str_sub(XlinkProphet_output$protein1[i], 1, 9), "sp\\|" )
XlinkProphet_output$Accession2[i] <- str_remove(str_sub(XlinkProphet_output$protein2[i], 1, 9), "sp\\|" )
XlinkProphet_output$PepPos1[i] <- ReturnXL_PepPos(XlinkProphet_output$peptide1[i])
XlinkProphet_output$PepPos2[i] <- ReturnXL_PepPos(XlinkProphet_output$peptide2[i])
XlinkProphet_output$Pep1[i] <- ReturnXL_Stripped(XlinkProphet_output$peptide1[i])
XlinkProphet_output$Pep2[i] <- ReturnXL_Stripped(XlinkProphet_output$peptide2[i])
}
return(XlinkProphet_output);
}
CleanXlinkProphetTable <- AddKey(CleanXLinkProphet(FilteredCSMs), "XL")
FilteredRedundantCrosslinks <- AddKey(CleanXLinkProphet(FilteredRedundantCrosslinks), "XL")
Clean6colTable <- AddKey(Raw6colOutput, "PPI")
Clean6colTable <- AddKey(Clean6colTable, "URP")
Clean6colTable <- AddKey(Clean6colTable, "XL")
Clean6colTable <- Clean6colTable %>% select(XL_Key, URP_Key, PPI_Key, ResidueKey1, ResidueKey2)
AnnotatedXLTable <- distinct(full_join(CleanXlinkProphetTable, Clean6colTable, by = "XL_Key" ))
write.table(AnnotatedXLTable, file = paste0(SampleName, "_AnnotatedXLTable.txt"), sep = "\t", row.names = FALSE, quote = FALSE)
Annotated information on PPI novelty
APID <- read.delim("Data/APID/559292_noISI_Q3_June2021.txt", stringsAsFactors=FALSE)
APID <- APID %>%
filter(ExpEvidences >= 1) %>%
rename(Accession1 = UniprotID_A, Accession2 = UniprotID_B) %>%
mutate(KnownAPID = TRUE)
APID <- AddKey(APID, "PPI")
PPIs <- AnnotatedXLTable %>%
filter(intra != 1) %>%
filter(!is.na(PPI_Key)) %>%
group_by(PPI_Key) %>%
summarise(NumURPs = n_distinct(URP_Key), NumCSMs = n()) %>%
distinct()
PPI_URPs <- AnnotatedXLTable %>%
filter(intra != 1) %>%
filter(!is.na(PPI_Key)) %>%
select(PPI_Key, URP_Key) %>%
distinct()
PPIs <- left_join(PPIs, APID, by = "PPI_Key")
PPIs <- PPIs %>%
mutate(KnownAPID = replace_na(KnownAPID, FALSE)) %>%
separate(PPI_Key, into = c("AccessionA", "AccessionB"), sep = "_",remove= FALSE )
PPI_URPs <- left_join(PPI_URPs, PPIs, by = "PPI_Key")
write.table(PPIs, file = "IdentifiedPPIs.txt", sep = "\t", quote = FALSE, row.names = FALSE)
write.table(PPI_URPs, file = "IdentifiedPPI_URPs.txt", sep = "\t", quote = FALSE, row.names = FALSE)
Generating a table of URPs.
URPs <- AnnotatedXLTable %>%
filter(composite_probability >= probability_comp_01) %>%
mutate(CrosslinkType = if_else(intra == 1, "Intra", "Inter")) %>%
select(URP_Key, CrosslinkType) %>%
separate(URP_Key, sep = "_", into = c("Residue1", "Residue2"), remove = FALSE) %>%
separate(Residue1, sep = "-", into = c("Accession1", "Lys1"), remove = FALSE) %>%
separate(Residue2, sep = "-", into = c("Accession2", "Lys2"), remove = FALSE) %>%
distinct()
write.table(URPs, file = "URPs.txt", row.names = FALSE, quote = FALSE, sep = "\t")
Make an input table for XLinkDB
XLinkDBTable <- AnnotatedXLTable %>%
select(Pep1, Accession1, PepPos1, Pep2, Accession2, PepPos2) %>%
distinct()
write.table(XLinkDBTable, file = paste0("XLinkDBInput_", SampleName, ".txt"), sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
Generating a table of CSMs compatible for analysis with MethylQuant Converting the calculated crosslink monoisotopic parent mass (theoretically determined by Comet when using C13 isotope error in search) to a m/z and calculate the theoretical isotope labelling pair. This is useful for use with MethylQuant, where you provide a list of m/z, charges, scans and mass differences (actually, a m/z difference calculated by taking into account the number of arginines and lysines present in the sequence, and the parent charge). Searching for m/z’s of real spectral identifications, and also for masses in other replicates (matches between runs idea).
TableForMethylQuant <- AnnotatedXLTable %>%
mutate(isLIGHT_ID = str_detect(peptide1, "325.13")) %>%
mutate(Sequence = str_replace_all(composite_id, "_", "")) %>%
mutate(Sequence = str_replace_all(Sequence, "x", "")) %>%
mutate(NumLysines = str_count(Sequence, "K")) %>%
mutate(NumArginines = str_count(Sequence, "R")) %>%
mutate(mono_parent_mass = peptide1_mass + peptide2_mass + 693.399601) %>%
mutate(HeavyMZshift = ((NumLysines * 8.014199) + (NumArginines * 10.008269))/parent_charge) %>%
mutate(MZ = (mono_parent_mass + (parent_charge * 1.00728))/parent_charge) %>%
mutate(MZShift = ifelse(isLIGHT_ID == FALSE, HeavyMZshift * -1, HeavyMZshift)) %>%
mutate(Modifications = "") %>%
rename(Charge = `parent_charge`) %>%
rename("Calc m/z" = MZ) %>%
rename("Mass Difference" = MZShift) %>%
mutate(realID = TRUE)
nrows <- nrow(TableForMethylQuant)
for (i in 1:nrows) {
TableForMethylQuant$SpecID[i] <- tail(strsplit(TableForMethylQuant$spectrum[i],"\\\\")[[1]],1)
TableForMethylQuant$'Data File'[i] <- paste0(str_split_fixed(TableForMethylQuant$SpecID[i], "\\.", n =4)[,1], ".raw")
TableForMethylQuant$'Start Scan'[i] <- str_split_fixed(TableForMethylQuant$SpecID[i], "\\.", n =4)[,2]
}
TableForMethylQuant <- TableForMethylQuant %>%
filter(!is.na(`Data File`))
DuplicateRV_1 <- TableForMethylQuant %>%
filter(!(str_detect(`Data File`, "RV_") & str_detect(`Data File`, "_1.raw"))) %>%
mutate(`Data File` = str_replace(`Data File`, "FWD", "RV")) %>%
mutate(`Data File` = str_replace(`Data File`, "_2.raw", "_1.raw")) %>%
select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
select(-SpecID) %>%
mutate(realID = FALSE)
DuplicateRV_2 <- TableForMethylQuant %>%
filter(!(str_detect(`Data File`, "RV_") & str_detect(`Data File`, "_2.raw"))) %>%
mutate(`Data File` = str_replace(`Data File`, "FWD", "RV")) %>%
mutate(`Data File` = str_replace(`Data File`, "_1.raw", "_2.raw")) %>%
select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
select(-SpecID) %>%
mutate(realID = FALSE)
DuplicateFWD_1 <- TableForMethylQuant %>%
filter(!(str_detect(`Data File`, "FWD_") & str_detect(`Data File`, "_1.raw"))) %>%
mutate(`Data File` = str_replace(`Data File`, "RV", "FWD")) %>%
mutate(`Data File` = str_replace(`Data File`, "_2.raw", "_1.raw")) %>%
select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
select(-SpecID) %>%
mutate(realID = FALSE)
DuplicateFWD_2 <- TableForMethylQuant %>%
filter(!(str_detect(`Data File`, "FWD_") & str_detect(`Data File`, "_2."))) %>%
mutate(`Data File` = str_replace(`Data File`, "RV", "FWD")) %>%
mutate(`Data File` = str_replace(`Data File`, "_1.raw", "_2.raw")) %>%
select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
select(-SpecID) %>%
mutate(realID = FALSE)
TableForMethylQuant <- bind_rows(TableForMethylQuant, DuplicateRV_1, DuplicateRV_2, DuplicateFWD_1, DuplicateFWD_2)
TableForMethylQuant <- TableForMethylQuant %>%
select(Accession1:`Start Scan`, peptide1_mass, peptide2_mass, parent_mass, Charge) %>%
distinct() %>%
filter(!is.na(`Data File`)) %>%
filter(!is.na(Sequence))
TableForMethylQuant <- distinct(TableForMethylQuant)
write.csv(TableForMethylQuant, file = paste0("TableForMethylQuant_IDsDuplicated", SampleName, ".csv"), row.names = FALSE, quote = TRUE)
Protein level normalisation factor extrapolation. Taking the normalisation factor for crosslink ratios by extrapolating normalisation factor from MaxQuant results of the matched proteome analysis.
proteinGroupsFWD <- read.delim("Data/MaxQuant/FW/proteinGroups.txt", stringsAsFactors=FALSE)
proteinGroupsRV <- read.delim("Data/MaxQuant/RV/proteinGroups.txt", stringsAsFactors=FALSE)
proteinGroupsFWD <- proteinGroupsFWD %>%
mutate(Replicate = "FWD")
proteinGroupsRV <- proteinGroupsRV %>%
mutate(Replicate = "RV")
ProteinswithRatiosFWD <- proteinGroupsFWD %>%
filter(Ratio.H.L<100)
FWD.meanProteinH.L = mean(ProteinswithRatiosFWD$Ratio.H.L)
FWD.meanProteinH.L
## [1] 0.6823332
ProteinswithRatiosRV <- proteinGroupsRV %>%
filter(Ratio.H.L<100)
RV.meanProteinH.L = mean(ProteinswithRatiosRV$Ratio.H.L)
RV.meanProteinH.L
## [1] 0.6504095
MethylQuant output cleaning, to check the quantified species match input by comparing PPM, and also filtering quanification above a score of 10.
MethylQuantOutput <- read_csv("Data/MethylQuant/TableForMethylQuant_IDsDuplicated04112020_Hpm1KO_Spheroplast_MethylQuant.csv")
## Rows: 33733 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): Accession1, Accession2, Pep1, Pep2, XLPep1, XLPep2, XL_Key, URP_Ke...
## dbl (49): PepPos1, PepPos2, NumLysines, NumArginines, mono_parent_mass, Heav...
## lgl (3): isLIGHT_ID, Modifications, realID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MethylQuantOutputLight <- MethylQuantOutput %>%
filter(isLIGHT_ID == TRUE) %>%
mutate(LightMZ =`Calc m/z`) %>%
mutate(MethylQuantMassDiffLight = ((`Light m/z 1 IsotopeCorrelation`- LightMZ))) %>%
mutate(MethylQuantMassDiffLightPPM = abs(((`Light m/z 1 IsotopeCorrelation`- LightMZ)/LightMZ)*1000000) ) %>%
mutate(HeavyMZ = `Calc m/z` + `Mass Difference`) %>%
mutate(MethylQuantMassDiffHeavy = ((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ))) %>%
mutate(MethylQuantMassDiffHeavyPPM = abs(((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ)/HeavyMZ)*1000000) )
MethylQuantOutputHeavy <- MethylQuantOutput %>%
filter(isLIGHT_ID == FALSE) %>%
mutate(LightMZ = `Calc m/z` + `Mass Difference`) %>%
mutate(MethylQuantMassDiffLight = ((`Light m/z 1 IsotopeCorrelation`- LightMZ))) %>%
mutate(MethylQuantMassDiffLightPPM = abs(((`Light m/z 1 IsotopeCorrelation`- LightMZ)/LightMZ)*1000000) ) %>%
mutate(HeavyMZ = `Calc m/z`) %>%
mutate(MethylQuantMassDiffHeavy = ((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ))) %>%
mutate(MethylQuantMassDiffHeavyPPM = abs(((`Heavy m/z 1 IsotopeCorrelation`- HeavyMZ)/HeavyMZ)*1000000) )
MethylQuantOutput <- bind_rows(MethylQuantOutputLight, MethylQuantOutputHeavy)
MethylQuantOutput <- MethylQuantOutput %>%
mutate(Iso2Light = LightMZ + (1.00866491588/Charge)) %>%
mutate(Iso3Light = Iso2Light + (1.00866491588/Charge)) %>%
mutate(Iso2LightPPM = ((Iso2Light - `Light m/z 2 IsotopeCorrelation`)/Iso2Light)*100000) %>%
mutate(Iso3LightPPM = ((Iso3Light - `Light m/z 3 IsotopeCorrelation`)/Iso3Light)*100000) %>%
mutate(Iso2Heavy = HeavyMZ + (1.00866491588/Charge)) %>%
mutate(Iso3Heavy = Iso2Heavy + (1.00866491588/Charge)) %>%
mutate(Iso2HeavyPPM = ((Iso2Heavy - `Heavy m/z 2 IsotopeCorrelation`)/Iso2Heavy)*100000) %>%
mutate(Iso3HeavyPPM = ((Iso3Heavy- `Heavy m/z 3 IsotopeCorrelation`)/Iso3Heavy)*100000)
MethylQuantOutput <- MethylQuantOutput %>%
mutate(Replicate = if_else(str_detect(`Data File`, "RV_"), "RV", "FWD")) %>%
mutate(XL_ID = row_number())
PepsWithRatios <- MethylQuantOutput %>%
filter(!is.na(`H/L Ratio #2`)) %>%
filter(MethylQuantMassDiffHeavyPPM <= 10) %>%
filter(MethylQuantMassDiffLightPPM <= 10) %>%
filter(`MethylQuant Score` > 10)
Remove crosslinked peptides containing methionine oxidation, which may interfere with quantification
MethioninePeps <- AnnotatedXLTable %>%
filter(str_detect(composite_id, "147.04"))
for (i in 1:NROW(MethioninePeps)) {
MethioninePeps$spectrum[i] = tail(strsplit(MethioninePeps$spectrum[i],"\\\\")[[1]],1)
}
PepsWithRatios <- anti_join(PepsWithRatios, MethioninePeps, by = c("SpecID" = "spectrum"))
FilteredRedundantCrosslinks <- FilteredRedundantCrosslinks %>%
select(XL_Key) %>%
mutate(ValidCrosslink = TRUE) %>%
distinct()
PepsWithRatios <- inner_join(PepsWithRatios, FilteredRedundantCrosslinks, by = "XL_Key" )
PepsWithRatios <- distinct(PepsWithRatios)
Normalising SILAC ratios using protein level normalisation factor, which was was the mean protein H/L ratio from matched proteomics experiments. Averaging URP ratios to the biological level, and only considering URP that were quantified in both replicates. Annotating information about key covariates for use BL multiple testing correction.
URPsQuantified <- PepsWithRatios %>%
select(XL_Key, URP_Key, PPI_Key, `Data File`, Replicate, `H/L Ratio #2`, `MethylQuant Score`,`# Good Elution Profile Correlations`) %>%
#picking the best ratio measurement for cross-linked peptides across technical replicates
group_by(XL_Key, URP_Key, PPI_Key, `Data File`, Replicate) %>%
top_n(n = 1, wt = `MethylQuant Score`) %>%
distinct() %>%
ungroup() %>%
#performing normalization to account for uneven mixing
mutate(NormCrosslinkRatio = if_else(Replicate == "RV", `H/L Ratio #2`/RV.meanProteinH.L, `H/L Ratio #2`/FWD.meanProteinH.L)) %>%
#log transforming ratios
mutate(NormCrosslinkLog2Ratio = log2(NormCrosslinkRatio)) %>%
#accounting for label swap
mutate(NormCrosslinkLog2_KO_WT_Ratio = if_else(Replicate == "RV", NormCrosslinkLog2Ratio * -1, NormCrosslinkLog2Ratio )) %>%
#summarising to biological replicate level
group_by(URP_Key, PPI_Key, Replicate) %>%
summarise(
#collapsing redundant ratios for a URP to one ratio per biological replicate
AvgCorrectXLRatio = mean(NormCrosslinkLog2_KO_WT_Ratio),
NumBioReplicates = n_distinct(Replicate),
#collecting data for covariates for BL analysis, across technical replicate measurements
NumRatios = n(),
All_Tech_Rep_Ratios = list(NormCrosslinkLog2_KO_WT_Ratio),
MaxMethylQuantScore = max(`MethylQuant Score`),
MaxNumberGoodElutionProfile = max(`# Good Elution Profile Correlations`)) %>%
ungroup() %>%
separate(URP_Key, c("ResidueKey1", "ResidueKey2"), sep = "_", remove = FALSE) %>%
separate(ResidueKey1, c("Accession1", "Residue1"), sep = "-", remove = FALSE) %>%
separate(ResidueKey2, c("Accession2", "Residue2"), sep = "-", remove = FALSE) %>%
filter(NumRatios > 0)
## `summarise()` has grouped output by 'URP_Key', 'PPI_Key'. You can override
## using the `.groups` argument.
URPsQuantifiedInBoth <- URPsQuantified %>%
#collapsing replicate ratios
group_by(URP_Key, PPI_Key, Accession1, Accession2) %>%
summarise(MeanLog2Ratio = mean(AvgCorrectXLRatio),
sd = sd(AvgCorrectXLRatio),
#for t-test
Ratios = list(AvgCorrectXLRatio),
NumBioReplicates = n_distinct(Replicate),
#biological replicate mean ratios
RatiosWritten = paste(round(AvgCorrectXLRatio, digits = 2), collapse=',' ),
#all ratios collected
AllQuantifiedRatios = list(flatten_dbl(All_Tech_Rep_Ratios)),
#generating finalised covariates for BL correction
sd_AllQuantifiedRatios = sd(flatten_dbl(All_Tech_Rep_Ratios)),
n_AllQuantifiedRatios = sum(NumRatios),
max_n_GoodIsoElutionProfiles_AllQuantifiedRatios = max(MaxNumberGoodElutionProfile)
) %>%
ungroup() %>%
#only taking URPs successfully quantified in both replicates
filter(NumBioReplicates == 2)
## `summarise()` has grouped output by 'URP_Key', 'PPI_Key', 'Accession1'. You can
## override using the `.groups` argument.
Performing two-tailed, unpaired t-tests to compare crosslink ratios to a ratio value of 0 (no change).
URPsQuantifiedInBoth$Pval = NA
for (i in 1:NROW(URPsQuantifiedInBoth)){
Ratios = unlist(URPsQuantifiedInBoth$Ratios[i])
TTestResult = t.test(Ratios, mu = 0, alternative = "two.sided")
URPsQuantifiedInBoth$Pval[i] = TTestResult$p.value[1]
}
Performing multiple testing correction using the BL method from the swFDR package, which uses covariate measures to calculated a q value for multiple testing correction
covariates<- URPsQuantifiedInBoth %>%
ungroup() %>%
select(sd_AllQuantifiedRatios, n_AllQuantifiedRatios, max_n_GoodIsoElutionProfiles_AllQuantifiedRatios)
library(swfdr)
lm <- lm_pi0(URPsQuantifiedInBoth$Pval, X=covariates, smooth.df = 2, type = "linear", smoothing = "smooth.spline")
qVal <- lm_qvalue(URPsQuantifiedInBoth$Pval, pfdr = TRUE, pi0 = lm,smoothing = "smooth.spline")
URPsQuantifiedInBoth$qvalue <- qVal$qvalues
URPsQuantifiedInBoth %>%
filter(qvalue <= 0.05) %>%
filter(abs(MeanLog2Ratio) >= 0.3)
Annotating information about proteins from UNIPROT
UNIPROT <- read.delim("Data/UniProt/UNIPROT_Yeast.txt", stringsAsFactors=FALSE)
UNIPROT <- UNIPROT %>%
mutate(Gene.names.primary.1 = paste0(str_to_sentence(Gene.names...primary.., locale = "en"), "p")) %>%
rename(Entry.name.1 = Entry.name) %>%
select(Entry, Entry.name.1, Gene.names.primary.1)
URPsQuantifiedInBoth <- left_join(URPsQuantifiedInBoth, UNIPROT, by = c("Accession1" = "Entry"))
UNIPROT <- UNIPROT %>%
rename(Entry.name.2 = Entry.name.1,
Gene.names.primary.2 = Gene.names.primary.1)
URPsQuantifiedInBoth <- left_join(URPsQuantifiedInBoth, UNIPROT, by = c("Accession2" = "Entry"))
Generating volcano plot. Purposely excluding one extreme value for visibility sake
ForVolcano <- URPsQuantifiedInBoth %>%
mutate(log2qvalue = -1* log2(qvalue)) %>%
mutate(Direction = case_when(
(qvalue <= 0.05) & (MeanLog2Ratio >= 0.3) ~ "Up",
(qvalue <= 0.05) & (MeanLog2Ratio <= -0.3) ~ "Down",
(qvalue > 0.05) ~"Not")) %>%
mutate(LabelName = paste0(Gene.names.primary.1, "-\n", Gene.names.primary.2)) %>%
#mutate(LabelName = Gene.names.primary.1) %>%
mutate(Label = if_else(((qvalue< 0.05) & (abs(MeanLog2Ratio) >= 0.3)), TRUE, FALSE)) %>%
mutate(Alpha = if_else(((qvalue< 0.05) & (abs(MeanLog2Ratio) >= 0.3)), TRUE, FALSE))
G <- ggplot(ForVolcano, aes(x = MeanLog2Ratio, y = log2qvalue, color = Direction , alpha = Alpha, label = LabelName)) +
geom_jitter() +
#geom_text_repel(data = subset(ForVolcano, Label == TRUE), size = 3) +
scale_color_manual(values = c( "#0000FF", "#bababa", "#D90000")) +
theme_bw() +
theme(legend.position = "none",
text=element_text(size=10, family="Arial"),
panel.border = element_rect(fill=NA, colour = "black", size=1),
panel.grid.minor = element_blank()) +
xlim(-2.,2.3)#+
#ylim(0, 2)
#scale_y_continuous(trans=scales::pseudo_log_trans(base = 10))
ggplotly(G)
## Warning: Using alpha for a discrete variable is not advised.
ggsave(filename = "CrosslinkVolcano.pdf", device = cairo_pdf,
width = 3, height = 3, units = "in")
## Warning: Using alpha for a discrete variable is not advised.
## Warning: Removed 37 rows containing missing values (geom_point).
Grouping URPs by signifcance level
SignificantlyChanging_URPs <- URPsQuantifiedInBoth %>%
filter(qvalue <= 0.05) %>%
filter(abs(MeanLog2Ratio) >= 0.3)
SignificantlyChanging_URPs
NonChanging_URPS <- URPsQuantifiedInBoth %>%
filter(qvalue > 0.05)
NonChanging_URPS
Annoting protein level change to changing URPs
proteinGroupsCombined <- bind_rows(proteinGroupsFWD, proteinGroupsRV)
filteredProteinGroups <- proteinGroupsCombined %>%
separate_rows(Majority.protein.IDs, sep =";") %>%
filter(!is.na(Ratio.H.L.normalized)) %>%
filter(Ratio.H.L.variability.... < 30) %>%
filter(!str_detect(Protein.IDs, "CON_")) %>%
filter(!str_detect(Protein.IDs, "REV_")) %>%
mutate(Accession1 = str_remove(str_sub(Majority.protein.IDs, 1, 9), "sp\\|" )) %>%
rename(NormProteinRatio = Ratio.H.L.normalized) %>%
mutate(NormProteinLog2Ratio = log2(NormProteinRatio)) %>%
mutate(NormProteinLog2_KO_WT_ratio = if_else(Replicate == "RV", NormProteinLog2Ratio * -1, NormProteinLog2Ratio)) %>%
select(NormProteinLog2_KO_WT_ratio, Accession1, Fasta.headers, Ratio.H.L, Ratio.H.L.variability...., Ratio.H.L.count, Replicate)
#average over replicates
MeanRatioProteinLevel <- filteredProteinGroups %>%
group_by(Accession1) %>%
summarise(Mean_NormProteinLog2_KO_WT_ratio.Protein.1 = mean(NormProteinLog2_KO_WT_ratio))
SignificantlyChanging_URPs <- left_join(SignificantlyChanging_URPs, MeanRatioProteinLevel, by = c("Accession1" = "Accession1"))
MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
rename(Mean_NormProteinLog2_KO_WT_ratio.Protein.2 = Mean_NormProteinLog2_KO_WT_ratio.Protein.1)
SignificantlyChanging_URPs <- left_join(SignificantlyChanging_URPs, MeanRatioProteinLevel, by = c("Accession2" = "Accession1"))
Comparing URP ratios to protein level changes, two-tailed, unpaired t-test, with “fdr” method of multiple testing comparison
SignificantlyChanging_URPs_withProteinQuant <- SignificantlyChanging_URPs %>%
filter(!is.na(Mean_NormProteinLog2_KO_WT_ratio.Protein.1)) %>%
filter(!is.na(Mean_NormProteinLog2_KO_WT_ratio.Protein.2))
for (i in 1:NROW(SignificantlyChanging_URPs_withProteinQuant)){
Ratios = unlist(SignificantlyChanging_URPs_withProteinQuant$Ratios[i])
Test.1 = t.test(Ratios, mu = SignificantlyChanging_URPs_withProteinQuant$Mean_NormProteinLog2_KO_WT_ratio.Protein.1[i], alternative = "two.sided")
SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1[i] = Test.1$p.value[1]
Test.2 = t.test(Ratios, mu = SignificantlyChanging_URPs_withProteinQuant$Mean_NormProteinLog2_KO_WT_ratio.Protein.2[i], alternative = "two.sided")
SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2[i] = Test.2$p.value[1]
}
## Warning: Unknown or uninitialised column: `Pval.againstProtein.1`.
## Warning: Unknown or uninitialised column: `Pval.againstProtein.2`.
covariates<- SignificantlyChanging_URPs_withProteinQuant %>%
ungroup() %>%
select(sd_AllQuantifiedRatios, n_AllQuantifiedRatios, max_n_GoodIsoElutionProfiles_AllQuantifiedRatios)
library(swfdr)
lm <- lm_pi0(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1, X=covariates, smooth.df = 2, smoothing = "smooth.spline")
## Warning: maximal p is smaller than maximal lambda
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
qVal <- lm_qvalue(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.1, pfdr = TRUE, pi0 = lm)
SignificantlyChanging_URPs_withProteinQuant$qvalue_Protein1 <- qVal$qvalues
lm <- lm_pi0(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2, X=covariates, smooth.df = 2, smoothing = "smooth.spline")
qVal <- lm_qvalue(SignificantlyChanging_URPs_withProteinQuant$Pval.againstProtein.2, pfdr = TRUE, pi0 = lm)
SignificantlyChanging_URPs_withProteinQuant$qvalue_Protein2 <- qVal$qvalues
Generating a list of significantly changing URPs (at crosslink, and compared to protein level) for figure making in Cytoscape
ForFig <- SignificantlyChanging_URPs_withProteinQuant %>%
filter(qvalue_Protein1 <= 0.05) %>%
filter(qvalue_Protein2 <= 0.05) %>%
select(-Ratios, - AllQuantifiedRatios) %>%
separate(URP_Key, sep = "_", into = c("node1", "node2"), remove = FALSE)
MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
mutate(Proteinlog2 = Mean_NormProteinLog2_KO_WT_ratio.Protein.2)
ForNodes <- as_data_frame(c(ForFig$node1, ForFig$node2))
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ForNodes <- ForNodes %>%
separate(value, into = c("Protein", "Residue"), remove = FALSE, sep = "-") %>%
rename(Node = value)
write.table(MeanRatioProteinLevel, file = "ProteinsForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)
write.table(ForNodes, file = "NodeForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)
write.table(ForFig, file = "CrosslinksForCytoscape.txt", sep = "\t", row.names = FALSE, quote = FALSE)
Generating supp table info
ForSupp <- URPsQuantifiedInBoth %>%
select(URP_Key, Gene.names.primary.1, Gene.names.primary.2, MeanLog2Ratio, sd, RatiosWritten, Pval, qvalue, ) %>%
separate(URP_Key, into = c("A", "B"), sep = "_") %>%
separate(A, into = c("Protein.1", "Residue.1"), sep = "-") %>%
separate(B, into = c("Protein.2", "Residue.2"), sep = "-") %>%
rename(mean.URP.log2ratio =MeanLog2Ratio,
sd.URP.log2ratios = sd,
biologicalreplicates.log2ratios = RatiosWritten,
pvalue = Pval) %>%
mutate(mean.URP.log2ratio = round(mean.URP.log2ratio, 2),
sd.URP.log2ratios = round(sd.URP.log2ratios, 2),
pvalue = round(pvalue,2),
qvalue = round(qvalue, 2))
MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
select(Accession1, Proteinlog2) %>%
mutate(protein1.log2ratio = round(Proteinlog2, 2)) %>%
select(-Proteinlog2)
ForSupp <- left_join(ForSupp, MeanRatioProteinLevel, by =c("Protein.1" ="Accession1" ))
MeanRatioProteinLevel <- MeanRatioProteinLevel %>%
rename(protein2.log2ratio = protein1.log2ratio)
ForSupp <- left_join(ForSupp, MeanRatioProteinLevel, by =c("Protein.2" ="Accession1" ))
write.table(ForSupp, file = "Tab1_TableS4.txt", row.names = FALSE, quote = FALSE, sep = "\t")
URPsSigForSupp <- SignificantlyChanging_URPs_withProteinQuant %>%
filter(qvalue_Protein1 <= 0.05) %>%
filter(qvalue_Protein2 <= 0.05) %>%
select(URP_Key, Gene.names.primary.1, Gene.names.primary.2, MeanLog2Ratio, sd,RatiosWritten, Pval, qvalue, Mean_NormProteinLog2_KO_WT_ratio.Protein.1, Mean_NormProteinLog2_KO_WT_ratio.Protein.2, Pval.againstProtein.1, Pval.againstProtein.2, qvalue_Protein1, qvalue_Protein2) %>%
separate(URP_Key, into = c("A", "B"), sep = "_") %>%
separate(A, into = c("Protein.1", "Residue.1"), sep = "-") %>%
separate(B, into = c("Protein.2", "Residue.2"), sep = "-") %>%
rename(mean.URP.log2ratio =MeanLog2Ratio,
sd.URP.log2ratios = sd,
biologicalreplicates.log2ratios = RatiosWritten,
pvalue = Pval,
protein1.log2ratio = Mean_NormProteinLog2_KO_WT_ratio.Protein.1,
protein2.log2ratio = Mean_NormProteinLog2_KO_WT_ratio.Protein.2,
protein1.URPtoProt.pvalue = Pval.againstProtein.1,
protein2.URPtoProt.pvalue = Pval.againstProtein.2,
protein1.URPtoProt.qvalue = qvalue_Protein1,
protein2.URPtoProt.qvalue =qvalue_Protein2
) %>%
mutate(mean.URP.log2ratio = round(mean.URP.log2ratio, 2),
sd.URP.log2ratios = round(sd.URP.log2ratios, 2),
pvalue = round(pvalue,2),
qvalue = round(qvalue, 2),
protein1.log2ratio = round(protein1.log2ratio,2),
protein2.log2ratio = round(protein2.log2ratio, 2),
protein1.URPtoProt.pvalue = round(protein1.URPtoProt.pvalue,2),
protein2.URPtoProt.pvalue = round(protein2.URPtoProt.pvalue,2),
protein1.URPtoProt.qvalue= round(protein1.URPtoProt.qvalue,2),
protein2.URPtoProt.qvalue = round(protein2.URPtoProt.qvalue,2)
)
URPsSigForSupp
write.table(URPsSigForSupp, file = "Tab2_TableS4.txt", row.names = FALSE, quote = FALSE, sep = "\t")
Investigating URPs with signal in only one strain, but also replicatable across label swap replicates
PepsWithoutRatios <- anti_join(MethylQuantOutput, PepsWithRatios, by = "URP_Key")
PepsWithOnlyOneChanelOnly <- PepsWithoutRatios %>%
separate(ResidueKey1, c("Accession1", "Residue1"), sep = "-", remove = FALSE) %>%
separate(ResidueKey2, c("Accession2", "Residue2"), sep = "-", remove = FALSE) %>%
distinct() %>%
mutate(TotalLightIntensity = `Light Intensity 1 IsotopeCorrelation` + `Light Intensity 2 IsotopeCorrelation` + `Light Intensity 3 IsotopeCorrelation`) %>%
mutate(TotalHeavyIntensity = `Heavy Intensity 1 IsotopeCorrelation` + `Heavy Intensity 2 IsotopeCorrelation` + `Heavy Intensity 3 IsotopeCorrelation` ) %>%
mutate(OnlyHeavy = if_else(TotalLightIntensity == 0 & TotalHeavyIntensity !=0, TRUE, FALSE)) %>%
mutate(OnlyLight = if_else(TotalHeavyIntensity == 0 & TotalLightIntensity !=0, TRUE, FALSE)) %>%
group_by(URP_Key, Replicate, Accession1, Residue1, Accession2, Residue2) %>%
distinct() %>%
summarise(AllOnlyLight = all(OnlyLight),
AllOnlyHeavy = all(OnlyHeavy)) %>%
ungroup() %>%
mutate(UniqueToOneChannel = AllOnlyLight + AllOnlyHeavy) %>%
filter(UniqueToOneChannel == 1) %>%
group_by(URP_Key, Accession1, Residue1, Accession2, Residue2) %>%
summarise(NumberReps = n(), SumAllLight = sum(AllOnlyLight), SumAllHeavy = sum(AllOnlyHeavy)) %>%
ungroup()
## `summarise()` has grouped output by 'URP_Key', 'Replicate', 'Accession1',
## 'Residue1', 'Accession2'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'URP_Key', 'Accession1', 'Residue1',
## 'Accession2'. You can override using the `.groups` argument.
PepsWithOneChanelOnly_OppositeInLabelSwap <- PepsWithOnlyOneChanelOnly %>%
filter(NumberReps == 2, SumAllLight == 1, SumAllHeavy == 1)
NROW(PepsWithOneChanelOnly_OppositeInLabelSwap)
## [1] 0
To determine degree of label mixing in crosslinked spectra
Peps <- read.delim("Data/XLinkProphet/iprophet.pep.xml.tsv")
#1% FDR at peptide level controlled by iProphet in TPP
Filtered <- Peps %>%
filter(probability >= 0.6909)
Light <- Filtered %>%
filter(str_detect(modified_peptide, "325"))
Heavy <- Filtered %>%
filter(str_detect(modified_peptide, "333"))
Light_scans = unique(Light$spectrum)
Heavy_scans = unique(Heavy$spectrum)
#total number of scans with IDs
length(unique(Filtered$spectrum))
## [1] 22973
#total numbers of scans that had both heavy and light IDd assigned to them
intersect(Light_scans, Heavy_scans)
## [1] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.19365.19365.1"
## [2] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.21894.21894.1"
## [3] "04112020_Hpm1KO_FWD_F10_XL_5uL_1_000_A.27696.27696.1"
## [4] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.19311.19311.1"
## [5] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20812.20812.1"
## [6] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20822.20822.1"
## [7] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.20838.20838.1"
## [8] "04112020_Hpm1KO_FWD_F10_XL_5uL_2_000_B.22835.22835.1"
## [9] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18618.18618.1"
## [10] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18624.18624.1"
## [11] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18659.18659.1"
## [12] "04112020_Hpm1KO_FWD_F11_XL_5uL_1_000_B.18808.18808.1"
## [13] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18574.18574.1"
## [14] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18613.18613.1"
## [15] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18720.18720.1"
## [16] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.18785.18785.1"
## [17] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.19778.19778.1"
## [18] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_006_B.19778.19778.1"
## [19] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.22909.22909.1"
## [20] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.24052.24052.1"
## [21] "04112020_Hpm1KO_FWD_F11_XL_5uL_2_000_B.25681.25681.1"
## [22] "04112020_Hpm1KO_FWD_F12_XL_5uL_1_000_B.19435.19435.1"
## [23] "04112020_Hpm1KO_FWD_F12_XL_5uL_1_000_B.20809.20809.1"
## [24] "04112020_Hpm1KO_FWD_F12_XL_5uL_2_000_B.19541.19541.1"
## [25] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_B.18892.18892.1"
## [26] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_A.21339.21339.1"
## [27] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_B.23808.23808.1"
## [28] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_1_000_A.28845.28845.1"
## [29] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_000_B.15021.15021.2"
## [30] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_003_A.23098.23098.1"
## [31] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_000_B.23750.23750.1"
## [32] "04112020_Hpm1KO_FWD_F13_15_XL_5uL_2_003_A.28893.28893.1"
## [33] "04112020_Hpm1KO_FWD_F8_9_XL_5uL_2_001_A.25380.25380.1"
## [34] "04112020_Hpm1KO_RV_F10_XL_5uL_1_000_B.18595.18595.1"
## [35] "04112020_Hpm1KO_RV_F10_XL_5uL_1_002_B.20810.20810.1"
## [36] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18393.18393.1"
## [37] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18399.18399.1"
## [38] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_B.18596.18596.1"
## [39] "04112020_Hpm1KO_RV_F10_XL_5uL_2_000_A.21083.21083.1"
## [40] "04112020_Hpm1KO_RV_F11_XL_5uL_1_000_B.18193.18193.1"
## [41] "04112020_Hpm1KO_RV_F11_XL_5uL_1_000_B.19327.19327.1"
## [42] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18188.18188.1"
## [43] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18295.18295.1"
## [44] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.18320.18320.1"
## [45] "04112020_Hpm1KO_RV_F11_XL_5uL_2_001_B.19348.19348.1"
## [46] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19348.19348.1"
## [47] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19351.19351.1"
## [48] "04112020_Hpm1KO_RV_F11_XL_5uL_2_000_B.19356.19356.1"
## [49] "04112020_Hpm1KO_RV_F11_XL_5uL_2_001_B.20127.20127.1"
## [50] "04112020_Hpm1KO_RV_F12_XL_5uL_2_001_A.38853.38853.1"
## [51] "04112020_Hpm1KO_RV_F13_15_XL_5uL_2_000_A.11206.11206.1"
For Supp comparison on replicates
FWD_URPs_quantified <- URPsQuantified %>%
filter(Replicate == "FWD") %>%
select(URP_Key, Replicate, AvgCorrectXLRatio)
RV_URPs_quantified <- URPsQuantified %>%
filter(Replicate == "RV") %>%
select(URP_Key, Replicate, AvgCorrectXLRatio)
URPs_Both_SuppFig <- left_join(FWD_URPs_quantified, RV_URPs_quantified, by = "URP_Key")
SigURPs <- ForFig %>%
select(URP_Key, MeanLog2Ratio) %>%
mutate(Sig = TRUE)
URPs_Both_SuppFig <- left_join(URPs_Both_SuppFig, SigURPs, by = "URP_Key")
URPs_Both_SuppFig <- URPs_Both_SuppFig %>%
filter(!is.na(Replicate.x)) %>%
filter(!is.na(Replicate.y)) %>%
mutate(Direction = case_when(
(MeanLog2Ratio >= 0.3) & (Sig == TRUE) ~ "Up",
(MeanLog2Ratio <= -0.3) & (Sig == TRUE) ~ "Down")) %>%
mutate(Direction = replace_na(Direction, "Not"),
Sig = replace_na(Sig, FALSE))
ggplot(URPs_Both_SuppFig,aes(x = AvgCorrectXLRatio.x, y = AvgCorrectXLRatio.y, color = Direction)) +
geom_jitter() +
scale_color_manual(values = c( "#0000FF", "#bababa", "#D90000")) +
theme_bw() +
theme(legend.position = "none",
text=element_text(size=10, family="Arial"),
panel.border = element_rect(fill=NA, colour = "black", size=1),
panel.grid.minor = element_blank()) +
xlim(-4, 4) +
ylim(-4,4) +
geom_abline(intercept =0 , slope = 1)
## Warning: Removed 2 rows containing missing values (geom_point).
ggsave(filename = "S4_URP.pdf", device = cairo_pdf,
width = 3.5, height = 3.5, units = "in")
## Warning: Removed 2 rows containing missing values (geom_point).
FWD_prot_quantified <- filteredProteinGroups %>%
filter(Replicate == "FWD") %>%
select( Accession1, NormProteinLog2_KO_WT_ratio)
RV_prot_quantified <- filteredProteinGroups %>%
filter(Replicate == "RV") %>%
select( Accession1, NormProteinLog2_KO_WT_ratio)
Prot_Both_quantified <- left_join(FWD_prot_quantified, RV_prot_quantified, by = "Accession1")
Prot_Both_quantified <- Prot_Both_quantified %>%
filter(!is.na(NormProteinLog2_KO_WT_ratio.x)) %>%
filter(!is.na(NormProteinLog2_KO_WT_ratio.y))
Prot <- ggplot(Prot_Both_quantified,aes(x = NormProteinLog2_KO_WT_ratio.x, y = NormProteinLog2_KO_WT_ratio.y)) +
geom_jitter(color ="#bababa" ) +
theme_bw() +
theme(legend.position = "none",
text=element_text(size=10, family="Arial"),
panel.border = element_rect(fill=NA, colour = "black", size=1),
panel.grid.minor = element_blank()) +
xlim(-4, 4) +
ylim(-4, 4) +
geom_abline(intercept =0 , slope = 1)
Prot
ggsave(filename = "S5_Prot.pdf", device = cairo_pdf,
width = 3.5, height = 3.5, units = "in")